import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from statsmodels.tsa.arima_model import ARIMA
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from sklearn.metrics import mean_absolute_error, mean_squared_error, mean_squared_error
from math import sqrt
import warnings
from tabulate import tabulate
warnings.simplefilter('ignore')
df = pd.read_csv('ME.csv')
df.head(10)
| Fecha | skill | starttime | ATEND | ABANDO | ANTED_20S | |
|---|---|---|---|---|---|---|
| 0 | 2023-01-01 | 1205 | 00:00:00 | 0 | 0 | 0 |
| 1 | 2023-01-01 | 1201 | 00:00:00 | 1 | 0 | 1 |
| 2 | 2023-01-01 | 1202 | 00:00:00 | 0 | 0 | 0 |
| 3 | 2023-01-01 | 1203 | 00:00:00 | 0 | 0 | 0 |
| 4 | 2023-01-01 | 1202 | 00:30:00 | 0 | 0 | 0 |
| 5 | 2023-01-01 | 1201 | 00:30:00 | 0 | 0 | 0 |
| 6 | 2023-01-01 | 1205 | 00:30:00 | 0 | 0 | 0 |
| 7 | 2023-01-01 | 1203 | 00:30:00 | 0 | 0 | 0 |
| 8 | 2023-01-01 | 1205 | 01:00:00 | 0 | 0 | 0 |
| 9 | 2023-01-01 | 1201 | 01:00:00 | 0 | 0 | 0 |
df.isnull().sum()
Fecha 0 skill 0 starttime 0 ATEND 0 ABANDO 0 ANTED_20S 0 dtype: int64
df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 41128 entries, 0 to 41127 Data columns (total 6 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Fecha 41128 non-null object 1 skill 41128 non-null int64 2 starttime 41128 non-null object 3 ATEND 41128 non-null int64 4 ABANDO 41128 non-null int64 5 ANTED_20S 41128 non-null int64 dtypes: int64(4), object(2) memory usage: 1.9+ MB
df.describe()
| skill | ATEND | ABANDO | ANTED_20S | |
|---|---|---|---|---|
| count | 41128.000000 | 41128.000000 | 41128.000000 | 41128.000000 |
| mean | 1202.750000 | 5.249368 | 0.055874 | 4.737819 |
| std | 1.479038 | 8.682921 | 0.406371 | 7.825416 |
| min | 1201.000000 | 0.000000 | 0.000000 | 0.000000 |
| 25% | 1201.750000 | 0.000000 | 0.000000 | 0.000000 |
| 50% | 1202.500000 | 1.000000 | 0.000000 | 1.000000 |
| 75% | 1203.500000 | 7.000000 | 0.000000 | 6.000000 |
| max | 1205.000000 | 92.000000 | 19.000000 | 79.000000 |
df.skew()
<ipython-input-19-9e0b1e29546f>:1: FutureWarning: The default value of numeric_only in DataFrame.skew is deprecated. In a future version, it will default to False. In addition, specifying 'numeric_only=None' is deprecated. Select only valid columns or specify the value of numeric_only to silence this warning. df.skew()
skill 0.434667 ATEND 2.551994 ABANDO 16.079719 ANTED_20S 2.541835 dtype: float64
df_reemplazado = df.replace({1205: 'A', 1201: 'B',1202: 'C',1203: 'D'})
df_g = df.groupby(['Fecha'])['ATEND', 'ABANDO', 'ANTED_20S'].sum()
fig, ax = plt.subplots(figsize=(20, 6))
sns.lineplot(x='Fecha', y='ATEND', data = df,ax=ax, label='Llamadas Atendidas')
sns.lineplot(x='Fecha', y='ABANDO', data = df,ax=ax, label='Llamadas Abandonadas')
sns.lineplot(x='Fecha', y='ANTED_20S', data = df,ax=ax, label = 'Llamadas Atendidas 20s')
plt.xlabel('Fecha')
plt.ylabel('Valor')
plt.xticks(rotation=90, ha='right', fontsize=8)
plt.show()
datos_agrupados_Atend = df_reemplazado.groupby(['Fecha', 'skill'])['ATEND'].sum().reset_index()
datos_agrupados_Abandon = df_reemplazado.groupby(['Fecha', 'skill'])['ABANDO'].sum().reset_index()
datos_agrupados_Atend20s = df_reemplazado.groupby(['Fecha', 'skill'])['ANTED_20S'].sum().reset_index()
fig, ax = plt.subplots(2, 3, figsize=(30, 10))
sns.lineplot(x='Fecha', y='ATEND', data=df, ax=ax[0,0])
sns.lineplot(x='Fecha', y='ABANDO', data=df, ax=ax[0,1])
sns.lineplot(x='Fecha', y='ANTED_20S', data=df, ax=ax[0,2])
sns.lineplot(x='Fecha', y='ATEND',hue='skill', data=datos_agrupados_Atend, ax=ax[1,0])
sns.lineplot(x='Fecha', y='ABANDO',hue='skill', data=datos_agrupados_Abandon, ax=ax[1,1])
sns.lineplot(x='Fecha', y='ANTED_20S',hue='skill', data=datos_agrupados_Atend20s, ax=ax[1,2])
<Axes: xlabel='Fecha', ylabel='ANTED_20S'>
plt.figure(figsize=(10, 10))
sns.distplot(df['ATEND'], hist_kws={'color': 'blue', 'label': 'ATEND'})
sns.distplot(df['ABANDO'], hist_kws={'color': 'green', 'label': 'ABANDO'})
sns.distplot(df['ANTED_20S'], hist_kws={'color': 'red', 'label': 'ANTED_20S'})
plt.title("Histograma General")
plt.xlabel("Valores")
plt.ylabel("Frecuencia")
plt.show()
fig, ax = plt.subplots(1, 3, figsize=(20, 10))
sns.distplot(df[['ATEND']],hist_kws={'color': 'blue', 'label': 'ATEND'} ,ax=ax[0])
sns.distplot(df[['ABANDO']], hist_kws={'color': 'green', 'label': 'ABANDO'},ax=ax[1])
sns.distplot(df[['ANTED_20S']], hist_kws={'color': 'red', 'label': 'ANTED_20S'},ax=ax[2])
<Axes: ylabel='Density'>
fig, ax = plt.subplots(2, 3, figsize=(30, 10))
sns.distplot(df.groupby(['Fecha'])['ATEND'].sum(),hist_kws={'color': 'blue', 'label': 'ATEND'},ax=ax[0,0])
sns.distplot(df.groupby(['Fecha'])['ABANDO'].sum(),hist_kws={'color': 'green', 'label': 'ABANDO'},ax=ax[0,1])
sns.distplot(df.groupby(['Fecha'])['ANTED_20S'].sum(),hist_kws={'color': 'red', 'label': 'ANTED_20S'},ax=ax[0,2])
sns.histplot(data=datos_agrupados_Atend, x='ATEND', hue='skill', element='step', stat='density', kde = True, common_norm=False,ax=ax[1,0])
sns.histplot(data=datos_agrupados_Abandon, x='ABANDO', hue='skill', element='step', stat='density', kde = True,common_norm=False,ax=ax[1,1])
sns.histplot(data=datos_agrupados_Atend20s, x='ANTED_20S', hue='skill', element='step', stat='density',kde = True ,common_norm=False,ax=ax[1,2])
plt.show()
datos_agregados = df.groupby(['Fecha', 'skill'])['ATEND', 'ABANDO'].sum().reset_index()
df_reemplazado = datos_agregados.replace({1205: 'A', 1201: 'B',1202: 'C',1203: 'D'})
plt.figure(figsize=(20, 6))
sns.jointplot(data=df_reemplazado, x='ABANDO', y='ATEND', kind = 'reg')
sns.jointplot(data=df_reemplazado, x='ABANDO', y='ATEND', hue = 'skill')
sns.jointplot(data=df_reemplazado, x='ABANDO', y='ATEND', hue = 'skill', kind='kde')
g = sns.lmplot(x='ABANDO', y='ATEND', hue="skill", truncate=True, height=5, data=df_reemplazado)
g.set_axis_labels("ABANDO", "ATEND")
<seaborn.axisgrid.FacetGrid at 0x7e47f5eb0790>
<Figure size 2000x600 with 0 Axes>
from statsmodels.tsa.arima.model import ARIMA
df_diff = df_g.diff().dropna()
fig, ax = plt.subplots(2, 2, figsize=(15, 10))
plot_acf(df_diff['ATEND'], lags=40, ax=ax[0,0])
plot_pacf(df_diff['ATEND'], lags=40, ax=ax[0,1])
plot_acf(df_diff['ABANDO'], lags=40, ax=ax[1,0])
plot_pacf(df_diff['ABANDO'], lags=40, ax=ax[1,1])
plt.show()
model = ARIMA(df_g['ATEND'], order=(5, 1, 0))
model_2 = ARIMA(df_g['ABANDO'], order=(2, 2, 1))
results_2 = model_2.fit()
results = model.fit()
summary_table_atend = results.summary()
summary_table_abnd = results_2.summary()
print("Resumen Atendidas")
print(tabulate(summary_table_atend.tables[1], headers='keys', tablefmt='fancy_grid'))
print("\n")
print("Resumen Abandonadas")
print(tabulate(summary_table_abnd.tables[1], headers='keys', tablefmt='fancy_grid'))
Resumen Atendidas ╒════════╤═══════════╤══════════╤════════╤═══════╤══════════╤══════════╕ │ 0 │ 1 │ 2 │ 3 │ 4 │ 5 │ 6 │ ╞════════╪═══════════╪══════════╪════════╪═══════╪══════════╪══════════╡ │ │ coef │ std err │ z │ P>|z| │ [0.025 │ 0.975] │ ├────────┼───────────┼──────────┼────────┼───────┼──────────┼──────────┤ │ ar.L1 │ -0.5337 │ 0.070 │ -7.637 │ 0.000 │ -0.671 │ -0.397 │ ├────────┼───────────┼──────────┼────────┼───────┼──────────┼──────────┤ │ ar.L2 │ -0.5449 │ 0.079 │ -6.929 │ 0.000 │ -0.699 │ -0.391 │ ├────────┼───────────┼──────────┼────────┼───────┼──────────┼──────────┤ │ ar.L3 │ -0.4749 │ 0.083 │ -5.743 │ 0.000 │ -0.637 │ -0.313 │ ├────────┼───────────┼──────────┼────────┼───────┼──────────┼──────────┤ │ ar.L4 │ -0.4269 │ 0.104 │ -4.117 │ 0.000 │ -0.630 │ -0.224 │ ├────────┼───────────┼──────────┼────────┼───────┼──────────┼──────────┤ │ ar.L5 │ -0.4442 │ 0.098 │ -4.545 │ 0.000 │ -0.636 │ -0.253 │ ├────────┼───────────┼──────────┼────────┼───────┼──────────┼──────────┤ │ sigma2 │ 1.519e+05 │ 1.27e+04 │ 11.980 │ 0.000 │ 1.27e+05 │ 1.77e+05 │ ╘════════╧═══════════╧══════════╧════════╧═══════╧══════════╧══════════╛ Resumen Abandonadas ╒════════╤══════════╤══════════╤═════════╤═══════╤═══════════╤══════════╕ │ 0 │ 1 │ 2 │ 3 │ 4 │ 5 │ 6 │ ╞════════╪══════════╪══════════╪═════════╪═══════╪═══════════╪══════════╡ │ │ coef │ std err │ z │ P>|z| │ [0.025 │ 0.975] │ ├────────┼──────────┼──────────┼─────────┼───────┼───────────┼──────────┤ │ ar.L1 │ -0.6662 │ 0.036 │ -18.724 │ 0.000 │ -0.736 │ -0.596 │ ├────────┼──────────┼──────────┼─────────┼───────┼───────────┼──────────┤ │ ar.L2 │ -0.4657 │ 0.046 │ -10.056 │ 0.000 │ -0.556 │ -0.375 │ ├────────┼──────────┼──────────┼─────────┼───────┼───────────┼──────────┤ │ ma.L1 │ -0.9999 │ 6.020 │ -0.166 │ 0.868 │ -12.799 │ 10.799 │ ├────────┼──────────┼──────────┼─────────┼───────┼───────────┼──────────┤ │ sigma2 │ 196.5610 │ 1181.872 │ 0.166 │ 0.868 │ -2119.865 │ 2512.987 │ ╘════════╧══════════╧══════════╧═════════╧═══════╧═══════════╧══════════╛
fig, ax = plt.subplots(2, 3, figsize=(15, 10))
results.resid.plot(ax=ax[0,0], color='blue', title="Residuos Atendidas")
results_2.resid.plot(ax=ax[0,1], color='red', title="Residuos Abandonadas")
results.resid.plot(kind='kde',ax=ax[1,0], color='blue')
results_2.resid.plot(kind='kde',ax=ax[1,1], color='red')
results.resid.plot(ax=ax[0,2], color='blue', title="Residuos Atendidas vs Abandonadas")
results_2.resid.plot(ax=ax[0,2], color='red')
results.resid.plot(kind='kde',ax=ax[1,2],color='blue')
results_2.resid.plot(kind='kde',ax=ax[1,2],color='red')
plt.show()
resid_summary_atend_df = pd.DataFrame(results.resid.describe(), columns=['Residuos de Atendidas'])
resid_summary_abandon_df = pd.DataFrame(results_2.resid.describe(), columns=['Residuos de Abandonadas'])
print(tabulate(resid_summary_atend_df, headers='keys', tablefmt='fancy_grid'))
print("\n")
print(tabulate(resid_summary_abandon_df, headers='keys', tablefmt='fancy_grid'))
╒═══════╤═════════════════════════╕ │ │ Residuos de Atendidas │ ╞═══════╪═════════════════════════╡ │ count │ 217 │ ├───────┼─────────────────────────┤ │ mean │ 2.26026 │ ├───────┼─────────────────────────┤ │ std │ 398.751 │ ├───────┼─────────────────────────┤ │ min │ -1155.63 │ ├───────┼─────────────────────────┤ │ 25% │ -110.231 │ ├───────┼─────────────────────────┤ │ 50% │ 23.7796 │ ├───────┼─────────────────────────┤ │ 75% │ 153.142 │ ├───────┼─────────────────────────┤ │ max │ 1174.91 │ ╘═══════╧═════════════════════════╛ ╒═══════╤═══════════════════════════╕ │ │ Residuos de Abandonadas │ ╞═══════╪═══════════════════════════╡ │ count │ 217 │ ├───────┼───────────────────────────┤ │ mean │ -0.50933 │ ├───────┼───────────────────────────┤ │ std │ 14.3276 │ ├───────┼───────────────────────────┤ │ min │ -40.3605 │ ├───────┼───────────────────────────┤ │ 25% │ -5.83378 │ ├───────┼───────────────────────────┤ │ 50% │ -1.91944 │ ├───────┼───────────────────────────┤ │ 75% │ 3.24653 │ ├───────┼───────────────────────────┤ │ max │ 94.0839 │ ╘═══════╧═══════════════════════════╛
from statsmodels.tsa.arima.model import ARIMA
n_pasos = 10
modeler = ARIMA(df_g['ATEND'], order=(5, 1, 0))
results = modeler.fit()
forecast = results.forecast(steps=n_pasos)
prediccion = results.predict(start="2023-01-01", end="2023-08-09")
plt.figure(figsize=(25, 8))
plt.plot(df_g['ATEND'], label='Entrenamiento Atendidas')
plt.plot(df_g['ANTED_20S'], label='Entrenamiento Atendidas 20s')
sns.lineplot(prediccion, label='Predicción')
plt.plot(forecast, label='Predicción Futuras')
plt.legend()
plt.xticks(rotation=90, ha='right', fontsize=8)
plt.show()
n_pasos = 10
modeler_2 = ARIMA(df_g['ABANDO'], order=(5, 0, 2))
results_2 = modeler_2.fit()
forecast_2 = results_2.forecast(steps=n_pasos)
prediccion_2 = results_2.predict(start="2023-01-01", end="2023-08-09")
plt.figure(figsize=(25, 8))
sns.lineplot(df_g['ABANDO'], label='Predicción de entrenamiento')
sns.lineplot(prediccion_2, label='Predicción de entrenamiento')
plt.plot(forecast_2, label='Predicción Futuras')
plt.legend()
plt.xticks(rotation=90, ha='right', fontsize=8)
plt.show()
fig, ax = plt.subplots(1, 2, figsize=(30, 10))
sns.distplot(prediccion,kde=True, color='blue', label='Atendidades Predicción',ax=ax[0])
sns.distplot(df.groupby(['Fecha'])['ATEND'].sum(),kde=True, color='red', label='Atendidades Reales',ax=ax[0])
sns.distplot(prediccion_2,kde=True, color='blue', label='Abandonadas Predicción',ax=ax[1])
sns.distplot(df.groupby(['Fecha'])['ABANDO'].sum(),kde=True, color='red', label='Abandonadas Reales',ax=ax[1])
ax[0].legend()
ax[1].legend()
plt.show()
mae = mean_absolute_error(df_g['ATEND'][-276:], prediccion)
mse = mean_squared_error(df_g['ATEND'][-276:], prediccion)
rmse = sqrt(mse)
mape = (abs((df_g['ATEND'][-n_pasos:] - prediccion) / df_g['ATEND'][-n_pasos:])).mean()
print(f"MAE: {mae:.2f}")
print(f"MSE: {mse:.2f}")
print(f"RMSE: {rmse:.2f}")
print(f"MAPE: {mape * 100:.2f}%")
MAE: 277.42 MSE: 158275.05 RMSE: 397.84 MAPE: 59.26%
mae = mean_absolute_error(df_g['ABANDO'][-276:], prediccion)
mse = mean_squared_error(df_g['ABANDO'][-276:], prediccion)
rmse = sqrt(mse)
mape = (abs((df_g['ABANDO'][-276:] - prediccion) / df_g['ABANDO'][-276:])).mean()
print(f"MAE: {mae:.2f}")
print(f"MSE: {mse:.2f}")
print(f"RMSE: {rmse:.2f}")
print(f"MAPE: {mape * 100:.2f}%")
MAE: 982.06 MSE: 1044841.41 RMSE: 1022.17 MAPE: inf%
from statsmodels.tsa.arima.model import ARIMA
datos_agrupados = df_reemplazado.groupby(['Fecha', 'skill'])['ATEND'].sum().reset_index()
pivot_df = datos_agrupados.pivot(index='Fecha', columns='skill', values='ATEND').fillna(0)
n_pasos = 1
for causa in pivot_df.columns:
modelo_arima = ARIMA(pivot_df[causa].iloc[:-n_pasos], order=(5, 1, 0))
resultado = modelo_arima.fit()
plt.figure(figsize=(30, 6))
prediccion = resultado.predict(start=pivot_df.index[0], end=pivot_df.index[-n_pasos-1])
plt.plot(pivot_df[causa], label=causa + ' - Datos reales')
plt.plot(prediccion, label=causa + ' - Predicción de entrenamiento')
plt.xticks(rotation=90, ha='right', fontsize=8)
plt.legend()
plt.show()
for causa in pivot_df.columns:
modelo_arima = ARIMA(pivot_df[causa].iloc[:-n_pasos], order=(2, 1, 0))
resultado = modelo_arima.fit()
plt.figure(figsize=(30, 6))
prediccion = resultado.predict(start=pivot_df.index[0], end=pivot_df.index[-n_pasos-1])
sns.histplot(pivot_df[causa], label=causa + ' - Datos reales', kde=True)
sns.histplot(prediccion, label=causa + ' - Predicción de entrenamiento',kde=True)
plt.xticks(rotation=90, ha='right', fontsize=8)
plt.legend()
plt.figure(figsize=(30, 6))
for causa in pivot_df.columns:
modelo_arima = ARIMA(pivot_df[causa].iloc[:-n_pasos], order=(2, 1, 0))
resultado = modelo_arima.fit()
prediccion = resultado.predict(start=pivot_df.index[0], end=pivot_df.index[-n_pasos-1])
sns.lineplot(pivot_df[causa], label=causa + ' - Datos reales')
sns.lineplot(prediccion, label=causa + ' - Predicción de entrenamiento')
plt.xticks(rotation=90, ha='right', fontsize=8)
plt.legend()
plt.show()
from statsmodels.tsa.arima.model import ARIMA
datos_agrupados_2 = df_reemplazado.groupby(['Fecha', 'skill'])['ABANDO'].sum().reset_index()
pivot_df = datos_agrupados_2.pivot(index='Fecha', columns='skill', values='ABANDO').fillna(0)
n_pasos = 1
for causa in pivot_df.columns:
modelo_arima = ARIMA(pivot_df[causa].iloc[:-n_pasos], order=(2, 1, 0))
resultado = modelo_arima.fit()
plt.figure(figsize=(30, 6))
prediccion = resultado.predict(start=pivot_df.index[0], end=pivot_df.index[-n_pasos-1])
plt.plot(pivot_df[causa], label=causa + ' - Datos reales')
plt.plot(prediccion, label=causa + ' - Predicción de entrenamiento')
plt.xticks(rotation=90, ha='right', fontsize=8)
plt.legend()
for causa in pivot_df.columns:
modelo_arima = ARIMA(pivot_df[causa].iloc[:-n_pasos], order=(2, 1, 0))
resultado = modelo_arima.fit()
plt.figure(figsize=(30, 6))
prediccion = resultado.predict(start=pivot_df.index[0], end=pivot_df.index[-n_pasos-1])
sns.histplot(pivot_df[causa], label=causa + ' - Datos reales', kde=True)
sns.histplot(prediccion, label=causa + ' - Predicción de entrenamiento',kde=True)
plt.xticks(rotation=90, ha='right', fontsize=8)
plt.legend()
plt.figure(figsize=(30, 6))
for causa in pivot_df.columns:
modelo_arima = ARIMA(pivot_df[causa].iloc[:-n_pasos], order=(2, 1, 0))
resultado = modelo_arima.fit()
prediccion = resultado.predict(start=pivot_df.index[0], end=pivot_df.index[-n_pasos-1])
sns.lineplot(pivot_df[causa], label=causa + ' - Datos reales')
sns.lineplot(prediccion, label=causa + ' - Predicción de entrenamiento')
plt.xticks(rotation=90, ha='right', fontsize=8)
plt.legend()
plt.show()